In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV

def cat_model_init(X_train,y_train):
    cat_clf = CatBoostClassifier(silent=True)
    cat_clf.fit(X_train,y_train)
    
    return cat_clf

def cat_model_tuned(X_train,y_train):
    
    grid_search = {'silent': [True],
               'max_depth': [5,8,10],
               'learning_rate': [0.1,0.003,0.001],
               'iterations': [50,100]}
    clf = CatBoostClassifier()
    grid = GridSearchCV(estimator = clf, param_grid = grid_search, 
                               cv = 4, verbose= 5, n_jobs = -1)
    grid.fit(X_train,y_train)
    
    cat_model=grid.best_estimator_
    return cat_model
In [2]:
data = pd.read_csv('preprocessing1.csv', encoding = 'cp949', index_col=0 )
seed = 5764

target=data['Status']
data.drop(['Status'],axis=1,inplace = True)

# 학습, 테스트 데이터 분리 (0.7:0.3)
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=seed)
In [3]:
cat_model = cat_model_tuned(x_train,y_train)
Fitting 4 folds for each of 18 candidates, totalling 72 fits
In [4]:
import numpy as np
from lime import lime_tabular

cat_lime_explainer = lime_tabular.LimeTabularExplainer(np.array(x_train),feature_names = data.columns , mode="classification")
for i in range(0,10):
    cat_lime_explanation = cat_lime_explainer.explain_instance(x_test.iloc[i], cat_model.predict_proba)
    cat_lime_explanation.show_in_notebook(show_table=True)
    print(i)
0
1
2
3
4
5
6
7
8
9
In [12]:
import shap 

cat_shap_explainer = shap.Explainer(cat_model)
shap_values = cat_shap_explainer.shap_values(x_test)
In [20]:
shap.summary_plot(shap_values,x_test)
In [22]:
shap.dependence_plot('rate_of_interest', shap_values, x_test)
In [23]:
shap.dependence_plot('credit_type_EQUI', shap_values, x_test)